
import re
with open('static/html/index.html','r',encoding='utf-8') as  f:
   html =  re.sub('\n','',f.read())
   print(html)
   pattern_section = '<section class="main_section">(.*?)</section>'
   section_s = re.findall(pattern_section,html)
   print(len(section_s))
   category_pattern = '<h1>(.*?)</h1>'
   course_pattern ='<span class="course_name">(.*?)</span>'
   data_s = []
   for section in section_s:
       # findall返回一个列表，re.findall(category_pattern,section)[0]才是拿到他的值
       category = re.findall(category_pattern,section)[0]
       print(category)
       course_s = re.findall(course_pattern,section)
       data_s.append(
           {
               'category':category,
               'course_s':course_s
           }
       )
   print(data_s)
   for data in data_s:
       print(data.get('category'))
       for course in data.get('course_s'):
           print("    ",course)